The aim of this section is to present the different widgets used for the exploration of hypercubes and developped during the ODYCCEUS project. We adapt a little the initial programs for the case of octocubes that are used in IMAGEUN but the principles remains the same. Each widget will export a dataframe and a plotly figure, making possible to store the results in javascript and/or to use the table for development with another software.
We load the octocubes at different levels of time agregation and transform them in hypercubes by removing the dual dimensions of states and regions
base<-readRDS("octocubes/hc_mycorpus_covid_states_regions.RDS")
hc_day<-base$day[,.(tag=sum(tags),news=sum(news)),.(who,what,when,states=states1,regions=regions1)]
hc_week<-base$week[,.(tag=sum(tags),news=sum(news)),.(who,what,when,states=states1,regions=regions1)]
hc_month<-base$month[,.(tag=sum(tags),news=sum(news)),.(who,what,when,states=states1,regions=regions1)]
hc_year<-base$year[,.(tag=sum(tags),news=sum(news)),.(who,what,when,states=states1,regions=regions1)]
#### ---------------- testchi2 ----------------
#' @title Compute the average salience of the topic and test significance of deviation
#' @name what
#' @description create a table and graphic of the topic
#' @param tabtest a table with variable trial, success and null.value
#' @param minsamp : Threshold of sample size requested for salience computation
#' @param mintest : Threshold of estimated value requested for chi-square test
testchi2<-function(tabtest=tabtest,
minsamp = 20,
mintest = 5)
{
tab<-tabtest
n<-dim(tab)[1]
# Compute salience if sample size sufficient (default : N>20)
tab$estimate <-NA
tab$salience <-NA
tab$chi2<-NA
tab$p.value<-NA
tab$estimate<-round(tab$success/tab$trial,5)
tab$salience<-tab$estimate/tab$null.value
# Chi-square test if estimated value sufficient (default : Nij* > 5)
for (i in 1:n) {
if(tab$trial[i]*tab$null.value[i]>=mintest) {
test<-prop.test(x=tab$success[i],n=tab$trial[i], p=tab$null.value[i],
alternative = "greater")
tab$chi2[i]<-round(test$statistic,2)
tab$p.value[i]<-round(test$p.value,5)
}
}
# }
return(tab)
}
### ---------------- what ----------------
#' @title Compute the average salience of the topic
#' @name what
#' @description create a table and graphic of the topic
#' @param hc an hypercube prepared as data.table
#' @param subtop a subtag of the main tag (default = NA)
#' @param title Title of the graphic
what <- function (hc = hypercube,
what = "what",
subtop = NA,
title = "What ?")
{
tab<-hc
tab$what<-tab[[what]]
if (is.na(subtop)){tab$what <-tab$what !="_no_"}else {tab$what <- tab$what == subtop}
tab<-tab[,list(news = sum(news)),by = what]
tab$pct<-100*tab$news/sum(tab$news)
p <- plot_ly(tab,
labels = ~what,
values = ~pct,
type = 'pie') %>%
layout(title = title,
xaxis = list(showgrid = FALSE, zeroline = FALSE, showticklabels = FALSE),
yaxis = list(showgrid = FALSE, zeroline = FALSE, showticklabels = FALSE))
output<-list("table" = tab, "plotly" =p)
return(output)
}
res<-what(hc_year)
res$table
res$plotly
res <-hc_year %>% filter(states != "_no_") %>%
what(what = "states",
subtop ="RUS",
title = "Share of Russia in state news")
res$table
res$plotly
res <-hc_year %>% filter(regions != "_no_") %>%
what(what = "regions",
subtop ="OR_EU",
title = "Share of EU in macroregional news")
res$table
res$plotly
#### ---------------- who.what ----------------
#' @title visualize variation of the topic between media
#' @name who.what
#' @description create a table of variation of the topic by media
#' @param hc an hypercube prepared as data.table
#' @param test : visualize test (TRUE) or salience (FALSE)
#' @param minsamp : Threshold of sample size requested for salience computation
#' @param mintest sample size of estimate for chi-square test (default = 5)
#' @param title Title of the graphic
who.what <- function (hc = hypercube,
what = "what",
subtop = NA,
test = FALSE,
minsamp = 20,
mintest = 5,
title = "Who says What ?")
{
tab<-hc
tab$what<-tab[[what]]
if (is.na(subtop)){tab$what <-tab$what !="_no_"}else {tab$what <- tab$what == subtop}
# {tab$what <-tab$what !="_no_"}
tab<-tab[,list(trial = sum(news),success=round(sum(news*what),0)),by = list(who)]
ref <-round(sum(tab$success)/sum(tab$trial),4)
tab$null.value<-ref
tab<-testchi2(tabtest=tab,
minsamp = minsamp,
mintest = mintest)
if (test==FALSE) {tab$index =tab$salience
tab<-tab[tab$trial > minsamp,]
mycol<-brewer.pal(7,"YlOrRd")
}
else {tab$index=tab$p.value
tab<-tab[tab$trial*tab$null.value>mintest,]
mycol<-brewer.pal(7,"RdYlBu")
mycol[4]<-"lightyellow"
}
p <- plot_ly(tab,
x = ~who,
y = ~estimate*100,
color= ~index,
colors= mycol,
hoverinfo = "text",
text = ~paste('Source: ',who,
'<br /> Total news : ', round(trial,0),
'<br /> Topic news : ', round(success,0),
'<br /> % observed : ', round(estimate*100,2),'%',
'<br /> % estimated : ', round(null.value*100,2),'%',
'<br /> Salience : ', round(salience,2),
'<br /> p.value : ', round(p.value,4)),
type = "bar") %>%
layout(title = title,
yaxis = list(title = "% news"),
barmode = 'stack')
output<-list("table" = tab, "plotly" =p)
return(output)
}
who.what(hc_year)
$table
$plotly
Warning: textfont.color doesn't (yet) support data arraysWarning: textfont.color doesn't (yet) support data arraysWarning: textfont.color doesn't (yet) support data arraysWarning: textfont.color doesn't (yet) support data arrays
NA
res <-hc_year %>% filter(states != "_no_") %>%
who.what(what = "states",
subtop ="RUS",
title = "Share of Russia in international news",
test=TRUE)
res$table
res$plotly
res <-hc_year %>% filter(regions != "_no_") %>%
who.what(what = "regions",
subtop ="OR_EU",
title = "Share of EU in macroregional news",
test=TRUE)
res$table
res$plotly
Warning: textfont.color doesn't (yet) support data arraysWarning: textfont.color doesn't (yet) support data arraysWarning: textfont.color doesn't (yet) support data arraysWarning: textfont.color doesn't (yet) support data arrays